clear all
set more off
set trace off
cap log close
set scheme sj

*Set window for FE/Value analysis here

local win = "w_3_stack"

foreach w in `win' {
global windows "`w'" 

*Set unemployment rate here
global U "U_internal" 

cap log close

log using "$log/08_05_dom_job_analysis_${windows}_${U}_${S_DATE}_BC_no_drop", text replace

********************************************************************************
* Load Data and generate log based on specific options *
********************************************************************************

*Log for specific options	
dis "$S_DATE" " " "$S_TIME"
dis "Unemploymet rate is $U , windows for FE/Value analysis are $windows "

use train_start train_length year_entry region_entry german gender train_end_age month_entry train_edu wage_ft aearnings aearnings_ft spell_length_ft work_at_train work_in_train_occ work_in_train_ind region_entry year_entry potential_experience year      dom_job train_occ persnr train_ind wage current_ind region_current empl_current U_internal dom_job_ft betnr beruf certificate same_train_occ no_trainings using "$temp/analysis_pt_new_no_drop", clear


********************************************************************************
* Merge in chosen AKM-Values
********************************************************************************

gen ffe = .
gen re = .
gen cod = .

	if "$windows"=="w_3_stack" {
		forval t = 1999(3)2017 {	
			merge m:1 betnr using "$FE//firmfe_value_new_ver`t'3_bc.dta", keep(1 3) nogen
			replace ffe = firm_fe if year>=`t'-1 & year<=`t'+1
			replace re = rent if year>=`t'-1 & year<=`t'+1
			replace cod = cd if year>=`t'-1 & year<=`t'+1
			drop firm_fe rent cd
		}		
	}

rename ffe firm_fe
rename re rents
rename cod cd

keep if !missing(firm_fe)
keep if !missing(cd)
keep if dom_job_ft == 1 

********************************************************************************
* Deflation of Nominal Variables *
********************************************************************************

*Consumer Price Index West Germany 1975 - 1991 (base year 1995)
gen cpi=.
label variable cpi "Consumer Price Index"
replace cpi = 54.5 if year == 1975
replace cpi = 56.8 if year == 1976
replace cpi = 58.9 if year == 1977
replace cpi = 60.5 if year == 1978
replace cpi = 63.0 if year == 1979
replace cpi = 66.4 if year == 1980
replace cpi = 70.6 if year == 1981
replace cpi = 74.3 if year == 1982
replace cpi = 76.7 if year == 1983
replace cpi = 78.6 if year == 1984
replace cpi = 80.2 if year == 1985
replace cpi = 80.1 if year == 1986
replace cpi = 80.3 if year == 1987
replace cpi = 81.3 if year == 1988
replace cpi = 83.6 if year == 1989
replace cpi = 85.8 if year == 1990
replace cpi = 89.0 if year == 1991

*Set 2015 as base year for the period 1975 - 1991
replace cpi = (cpi/89.0)*65.5		// see Statistisches Bundesamt (2019)

*Consumer Price Index Germany (West and East, after 1991)
replace cpi =  68.8 if year == 1992
replace cpi =  71.9 if year == 1993
replace cpi =  73.8 if year == 1994
replace cpi =  75.1 if year == 1995
replace cpi =  76.1 if year == 1996
replace cpi =  77.6 if year == 1997
replace cpi =  78.3 if year == 1998
replace cpi =  78.8 if year == 1999
replace cpi =  79.9 if year == 2000
replace cpi =  81.5 if year == 2001
replace cpi =  82.6 if year == 2002
replace cpi =  83.5 if year == 2003
replace cpi =  84.9 if year == 2004
replace cpi =  86.2 if year == 2005
replace cpi =  87.6 if year == 2006
replace cpi =  89.6 if year == 2007
replace cpi =  91.9 if year == 2008
replace cpi =  92.2 if year == 2009
replace cpi =  93.2 if year == 2010
replace cpi =  95.2 if year == 2011
replace cpi =  97.1 if year == 2012
replace cpi =  98.5 if year == 2013
replace cpi =  99.5 if year == 2014
replace cpi = 100.0 if year == 2015
replace cpi = 100.5 if year == 2016
replace cpi = 102.0 if year == 2017
replace cpi = 103.8 if year == 2018
replace cpi = 105.3 if year == 2019

*Deflate wages, marginal part-time income threshold and contribution assessment ceiling (base year 2015)
gen wage_ft_defl      = 100 * wage_ft / cpi
drop cpi

********************************************************************************
* Define Main Analysis Sample 
********************************************************************************
keep if inlist(train_edu,1,3)
drop if year_entry == 1997

su potential_experience
drop if potential_experience == 20 
drop if potential_experience < 0

********************************************************************************
* Merge in and define additional varaibles
********************************************************************************

*High-skil occupation designation
merge m:1 train_occ using "$temp/high_skill"
drop if _merge == 2
drop _merge


*Logged earnings
gen earnings_ft_defl = spell_length_ft * wage_ft_defl
gen log_earnings_ft_defl  = log(earnings_ft_defl)

*East-West
tab region_entry, mis
gen east = inrange(region_entry,11,16) // counting Berlin as East
replace east = . if missing(region_entry)

*Same region indicator
gen byte same_region = (region_current==region_entry)

********************************************************************************
* Parallel sample construction to main file
********************************************************************************
gen tag_censored = (year(train_start)==1998 & month(train_start)==1)
drop if tag_censored == 1
gen log_length = log(train_length)
bysort persnr: gen n = _n

compress
	
drop n train_start tag_censored


********************************************************************************
* Obtain components needed for PDV calculations *
********************************************************************************

*Main decomposition baseline outcome (full periode)
qui sum potential_experience if dom_job_ft == 1 & !missing(firm_fe) &  !missing(cd)
global maxe = r(max)
global maxfirste = 9 
global maxseconde = 19

tabstat earnings_ft_defl if dom_job_ft == 1 & !missing(firm_fe) &  !missing(cd), by(potential_experience) save
local maxe1 = $maxe + 1
forval ep1 = 1/`maxe1' {
	local e = `ep1'-1
	mat a = r(Stat`ep1')
	global earnings_ft_defl`e' = a[1,1]
}

*Unemployment rate SD
su $U if year == year_entry, de
global sdu = r(sd) 

*Interest Rate
global R = 1.05


********************************************************************************
* Main Decomposition 
********************************************************************************
gen nonfirm = log_earnings_ft_defl - firm_fe

local def_out 		"log_earnings_ft_defl firm_fe rents cd nonfirm" 
local fe_absorb = 	"year_entry potential_experience year german gender train_end_age month_entry train_edu train_occ region_entry"


foreach outcome of varlist `def_out' {

	*Regression
	reghdfe `outcome' i.potential_experience#c.${U} if !missing(firm_fe) & !missing(cd) & dom_job_ft == 1, absorb(`fe_absorb') vce(cl region_entry)

	
		matrix coef1 = e(b)
		matrix var1  = e(V)
		matrix coef2 = e(b)
		matrix var2  = e(V)
		gen byte esample1 = e(sample)
		gen byte esample2 = e(sample)
		
	if "`outcome'" == "log_earnings_ft_defl" {
		
		*Cumulation (first 10 years)
		local denom = 0
		forvalues e = 0/$maxfirste {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_1 "0"
		forvalues e = 0/$maxfirste {  // 
			local exp = ${R}^(-`e')
			local command_1 = "`command_1' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_1: lincomest `command_1' - 100 
		estadd sca pairs = `pairs' : `outcome'_1
		estadd sca firms = `firms' : `outcome'_1
		estadd sca workers = `workers' : `outcome'_1
		
		*Cumulation (second ten years)
		ereturn post coef1 var1, esample(esample1)
		local denom = 0
		local startseconde = $maxfirste + 1
		forvalues e = `startseconde'/$maxseconde {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_2 "0"
		forvalues e = `startseconde'/$maxseconde {  // 
			local exp = ${R}^(-`e')
			local command_2 = "`command_2' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_2: lincomest `command_2' - 100
		estadd sca pairs = `pairs' : `outcome'_2
		estadd sca firms = `firms' : `outcome'_2
		estadd sca workers = `workers' : `outcome'_2
		
		*Summary statistics
		tabstat log_earnings_ft_defl log_wage_ft_defl firm_fe rents cd work_at_train work_in_train_occ work_in_train_ind same_region if e(sample) == 1, by(potential_experience) stat(mean N semean)
	
	}
	
	
	if "`outcome'" == "firm_fe" | "`outcome'" == "rents" | "`outcome'" == "cd" | "`outcome'" == "nonfirm" {
		
		*Cumulation (first ten years)
		local denom = 0
		forvalues e = 0/$maxfirste {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_1 "0"
		forvalues e = 0/$maxfirste {  // 
			local exp = ${R}^(-`e')
			local command_1 = "`command_1' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_1: lincomest `command_1' - 100
		estadd sca pairs = `pairs' : `outcome'_1
		estadd sca firms = `firms' : `outcome'_1
		estadd sca workers = `workers' : `outcome'_1

		*Cumulation (second 10 years)
		ereturn post coef1 var1, esample(esample1)
		local denom = 0
		local startseconde = $maxfirste + 1
		forvalues e = `startseconde'/$maxseconde {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_2 "0"
		forvalues e = `startseconde'/$maxseconde {  // 
			local exp = ${R}^(-`e')
			local command_2 = "`command_2' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_2: lincomest `command_2' - 100 
		estadd sca pairs = `pairs' : `outcome'_2
		estadd sca firms = `firms' : `outcome'_2
		estadd sca workers = `workers' : `outcome'_2
			
	}
	
cap drop esample1
cap drop esample2
	
}

*FINAL RESULTS for a given combination

esttab log_earnings_ft_defl_1 firm_fe_1 rents_1 cd_1 nonfirm_1, sca(firms workers pairs) mti b(2) se(2) title("FINAL RESULTS FOR 1. decade: U-Rate = ${U}; AKM-Value window = ${windows}") star(* 0.1 ** .05 *** 0.01)

esttab log_earnings_ft_defl_2 firm_fe_2 rents_2 cd_2 nonfirm_2, sca(firms workers pairs) mti b(2) se(2) title("FINAL RESULTS FOR 2. decade: U-Rate = ${U}; AKM-Value window = ${windows}") star(* 0.1 ** .05 *** 0.01)

	/*This is transposed from the paper -- each column is earn, firm_fe, rents, cd, non_firm*/

dis "$S_DATE" " " "$S_TIME"
cap log close
	
} 

clear
cap log close
